\documentclass[t,12pt,aspectratio=169]{beamer} % 16:9 宽屏比例，适合现代投影
\usepackage{ctex} % 中文支持
\usepackage{amsmath, amssymb} % 数学公式与符号
\usepackage{graphicx}
\usepackage{pythonhighlight}
\usepackage{url}
\usepackage{hyperref}
\usepackage{verbatim}

% 主题设置（推荐简洁风格）
\usetheme{Madrid}
\usecolortheme{default} % 可选：seahorse, beaver, dolphin 等

\title{应用回归分析第3章：多元线性回归 }
\author{HXQ ET AL}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\begin{frame}
  \titlepage
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{第3章目录 }

\begin{enumerate}

\item[3.1.] 多元线性回归模型
\item[3.2.] 回归系数的估计
\item[3.3.] 有关估计量的性质
\item[3.4.] 回归方程的显著性检验
\item[3.5.] 中心化和标准化
\item[3.6.] 相关阵和偏相关系数

\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.1.1. 线性回归模型的变量、数据、与任务}

\begin{itemize}

\item 设有自变量 $x_1,x_2,\cdots,x_p$ 和因变量 $y$. 
%\[ (x_{i1},x_{i2},\cdots, x_{ip},y_i), \,\, i=1,2,\cdots,n. \]

\item  设有 $n$ 组观测数据，写成表格的形式：
\begin{center}
\begin{tabular}{|c|cccc|c|} \hline 
变量  & $x_{1}$ & $x_{2}$ & $\cdots$ & $x_{p}$ & $y$ \\ \hline 
数据$1$ & $x_{11}$ & $x_{12}$ & $\cdots$ & $x_{1p}$ & $y_1$ \\ \hline 
数据$2$ & $x_{21}$ & $x_{22}$ & $\cdots$ & $x_{2p}$ & $y_2$ \\ \hline 
$\vdots$ & $\vdots$  & $\vdots$ & $\vdots$  & $\vdots$ & $\vdots$ \\ \hline
数据$n$ & $x_{n1}$ & $x_{n2}$ & $\cdots$ & $x_{np}$ & $y_n$ \\ \hline 
\end{tabular}
\end{center}

\item 问题：用自变量 $x_1,x_2,\cdots,x_p$ 来解释和预测因变量 $y$.

\item 方法：很多很多。线性回归模型是其中的最基本的一种。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.1.2. 多元线性回归模型的理论方程}

\begin{itemize}

\item  {\color{red} 问题：写出多元线性回归模型的理论方程。} 

\item 解答：多元线性回归模型的理论方程是：
\begin{eqnarray}
y=\beta_0+\beta_1x_1+\beta_2 x_2+\cdots+\beta_px_p+\varepsilon.
\end{eqnarray}

\item 假设：
\begin{itemize}
\item 误差项 $\varepsilon$ 是随机变量。
\item 自变量不是随机变量。
\item 因变量是随机变量。
\item 参数是未知的实数。
\end{itemize}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.1.3. 多元线性回归模型的样本方程}

\begin{itemize}
\item  {\color{red} 问题：设有观测数据如下，写出多元线性回归模型的样本方程，
\[ (x_{i1},x_{i2},\cdots,x_{ip},y_i),\quad 1\le i\le n. \]
}

\item 解答：多元线性回归模型如下，其中 $\beta_0,\beta_1,\cdots,\beta_p$ 为待估计的参数，
\[ {\color{red} y_i = \beta_0+\beta_1x_{i1}+\beta_2x_{i2}+\cdots+\beta_{p}x_{ip}+\varepsilon_i,\,\,\, i=1,2,\cdots,n} \]

\item  {\color{red} 问题：什么是设计矩阵？}
\item 解答：设计矩阵是自变量的数据放在一起组成的矩阵，每行一个记录。
\begin{eqnarray*}
X = \begin{bmatrix}
1 & x_{11} & x_{12} & \cdots & x_{1p}  \\
1 & x_{21} & x_{22} & \cdots & x_{2p}  \\
\vdots & \vdots & \vdots &  & \vdots \\
1 & x_{n1} & x_{n2} & \cdots & x_{np}  \\
\end{bmatrix}.
\end{eqnarray*}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.1.4. 多元线性回归模型的基本假设}

\begin{itemize}

\item  {\color{red} 问题：写出多元回归模型的基本假设。}

%\item 解答：

\begin{enumerate}

\item 自变量 $x_1,x_2,\cdots,x_p$ 是确定的观测值，相互不影响。%具体地说，是指上述表格的 $p$ 个列向量线性无关，最好是互相垂直。

\item 因变量与自变量之间的关系是近似线性的，即 
\[ y_i = \beta_0+\beta_1x_{i1}+\beta_2x_{i2}+\cdots+\beta_{p}x_{ip}+\varepsilon_i,\,\,\, i=1,2,\cdots,n\]

\item (Gauss-Makov条件) 误差项 $\varepsilon_1,\varepsilon_2,\cdots,\varepsilon_n$ 是均值为零、方差相同、且两两不相关的随机变量，即
{\color{red}
\begin{eqnarray*}
\left\{\begin{array}{ll}
\mathbb{E}(\varepsilon_i) = 0, \,\,\, \textrm{var}(\varepsilon_i) = \sigma^2, & i=1,2,\cdots n\\
\textrm{cov}(\varepsilon_i,\varepsilon_j) = 0, &  i\neq j, \,\, i,j=1,2,\cdots n
\end{array}\right.
\end{eqnarray*}}

\vspace{-0.4cm}

\item 我们还经常假设误差项服从独立同分布的正态分布，即 
\begin{eqnarray*}
\left\{\begin{array}{ll}
\varepsilon_i \sim N(0,\sigma^2), & i=1,2,\cdots n\\
\textrm{cov}(\varepsilon_i,\varepsilon_j) = 0, &  i\neq j, \,\, i,j=1,2,\cdots n
\end{array}\right.
\end{eqnarray*}

\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.1.5. 多元线性回归模型的矩阵形式}

\begin{itemize}
\item  {\color{red} 问题：线性回归方程的矩阵形式是什么？}

\item 解答：矩阵形式是 $\boxed{\color{red}{\bf y} = X\beta+\varepsilon}$, 即
\begin{eqnarray*}
\begin{bmatrix} y_{1} \\ y_{2} \\ \vdots \\ y_{n}  \end{bmatrix}
=\begin{bmatrix}
1 & x_{11} & x_{12} & \cdots & x_{1p}  \\
1 & x_{21} & x_{22} & \cdots & x_{2p}  \\
\vdots & \vdots & \vdots &  & \vdots \\
1 & x_{n1} & x_{n2} & \cdots & x_{np}  \\
\end{bmatrix}\cdot
\begin{bmatrix} \beta_0 \\ \beta_1\\ \vdots \\ \beta_p \end{bmatrix}
+
\begin{bmatrix} \varepsilon_{1} \\ \varepsilon_{2} \\ \vdots \\ \varepsilon_{n}  \end{bmatrix}
\end{eqnarray*}

\item 注解：这时，关于误差项的正态假设可以简洁地写为
\[ \varepsilon \sim N(0,\sigma^2 E_n),\]
其中 $E_n$ 是 $n$ 阶单位矩阵。例如 $E_2=\begin{bmatrix} 1&0 \\ 0&1\end{bmatrix}$. 

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.1.6. 例子}

\begin{itemize}
\item  {\color{red} 问题：设变量 $y$ 表示空调机的销售量，变量 $x_1$ 表示空调机的价格，变量 $x_2$ 表示消费者的可支配收入。建立线性回归模型 
\begin{eqnarray}
y=\beta_0+\beta_1x_1+\beta_2x_2+\varepsilon.
\end{eqnarray}
问题：从微观经济学的角度，解释参数 $\beta_1,\beta_2$ 的含义，并确定正负号。
}

\item 解答：

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.1.7. 多元线性回归的图像}

\begin{itemize}
\item  {\color{red} 问题：多元线性回归的图像是什么样的？}

解答：设 $p=2$, 如果样本数据 $(x_{i1},x_{i2},y_i)$ 的散点图大致落在某个平面的两侧附近，那么这个平面就是这个二元线性回归的拟合平面。

\begin{center}
\includegraphics[height=0.5\textheight,width=0.7\textwidth]{regression-plane.png}
\end{center}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.1.8. 画出回归平面}

\begin{itemize}

\item  {\color{red} 问题：画出一个二元线性回归模型的散点图和拟合平面。}

%
%1974 Motor Trend US magazine.
%

%
%\begin{eqnarray}
%T(kp(x)+\ell q(x)) &=& x((kp(x)+\ell q(x)))'=kxp'(x)+\ell xq'(x)\\
%kT(p(x))+\ell T(q(x)) &=& kx'p(x)+\ell xq'(x)).
%\end{eqnarray}
%
%\begin{eqnarray}
%q(x)= x^3, [q]_\mathcal{N} = (0,0,0,1)^t. \\
%h(x)= T(q(x)) = xq'(x) = 3x^3, h_\mathcal{N} = (0,0,0,3)^t.
%\end{eqnarray}

\item 解答：查看文件 \,{\color{blue}plotly-3d-regression-plane.R.}

{\color{blue}
\begin{verbatim}
> source('plotly-3d-regression-plane.R')
> ?mtcars
> mtcars
\end{verbatim}
}

\item 参考网页：
{\color{blue}
\url{https://www.tutorialspoint.com/r/r_multiple_regression.htm}
\url{https://plotly.com/r/3d-charts/}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.1.9. 思考题}

\begin{itemize}
\item  {\color{red} 问题：逐条解释多元线性回归模型的基本假设，}
\begin{enumerate}
\item 关于自变量与设计矩阵的假设。
\item 关于随机误差项的高斯-马尔可夫条件。
\item 关于随机误差项的正态假设。
\end{enumerate}

\item 解答：

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.1. 普通最小二乘法的思路}

\begin{itemize}
\item  {\color{red} 问题：%考虑多元线性回归模型 $y=\beta_0+\beta_1x_1+\cdots+\beta_px_p+\varepsilon$.
%设样本数据为 $$(x_{i1},x_{i2},\cdots,x_{ip},y_i), 1\le i\le n.$$
%求参数 $\beta_0,\beta_1,\cdots,\beta_p$ 使得下述残差平方和达到最小：
%$$Q=\sum\limits_{i=1}^n (y_i-\beta_0+\beta_1x_1+\cdots+\beta_px_p)^2.$$
考虑二元线性回归模型 $y=\beta_0+\beta_1x_1+\beta_2x_2+\varepsilon$.
设样本数据为 $$(x_{i1},x_{i2},y_i), 1\le i\le n.$$
用最小二乘法来估计参数 $\beta_0,\beta_1,\beta_2$ 的思路是什么？
}

\item 解答：求参数 $\beta_0,\beta_1,\beta_2$ 使得下述残差平方和达到最小，
$$Q=\sum\limits_{i=1}^n (y_i-\beta_0-\beta_1x_{i1}-\beta_2x_{i2})^2.$$
换句话说，
$$ (\hat{\beta}_0,\hat{\beta}_1,\hat{\beta}_2) = \underset{\beta_0,\beta_1,\beta_2}{\text{argmin}}\sum\limits_{i=1}^n (y_i-\beta_0-\beta_1x_{i1}-\beta_2x_{i2})^2.$$



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.2. 最小二乘估计的公式}

\begin{itemize}
\item 记号：下述矩阵分别是设计矩阵，因变量数据向量，参数向量，误差项向量。
\begin{eqnarray*}
X=\begin{pmatrix} 1&x_{11}&x_{12} \\ 1&x_{21}&x_{22} \\ \vdots&\vdots&\vdots \\ 1&x_{n1}&x_{n2} \end{pmatrix},
\hspace{0.1cm}
y=\begin{pmatrix} y_1 \\ y_2 \\ \vdots \\ y_n \end{pmatrix},
\hspace{0.1cm}
\beta=\begin{pmatrix} \beta_0 \\ \beta_1 \\ \beta_2 \end{pmatrix},
\hspace{0.1cm}
\hat{\beta}=\begin{pmatrix} \hat{\beta}_0 \\ \hat{\beta}_1 \\ \hat{\beta}_2 \end{pmatrix},
\hspace{0.1cm}
\varepsilon=\begin{pmatrix} \varepsilon_1 \\  \varepsilon_1 \\ \vdots \\  \varepsilon_n \end{pmatrix}.
\end{eqnarray*}

\item   {\color{red} 问题：写出参数的最小二乘估计的公式，和因变量的拟合值（回归值）。}

\item 解答：参数的最小二乘估计和因变量的拟合值分别为
\begin{eqnarray}
\hat{\beta} &=& (X^{\,t}X)^{-1}X^{\,t}Y, \\
\hat{y} &=& X\hat{\beta}=X(X^{\,t}X)^{-1}X^{\,t}y.
\end{eqnarray}

%经验回归方程为 $$\hat{y}=\hat{\beta}_0 +\hat{\beta}_1x_1 +\hat{\beta}_2x_2. $$

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.3. 帽子矩阵及其性质}

\begin{itemize}

\item  {\color{red} 问题：什么是帽子矩阵？解释它也称为投影矩阵的原因。}

\item 解答：称 $H:=X(X^{\,t}X)^{-1}X^{\,t}$ 为帽子矩阵，因为 $\hat{y} = Hy$. 

\begin{center}
\includegraphics[height=0.6\textheight,width=0.8\textwidth]{regression-projection-a.jpeg}
\end{center}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.4. 帽子矩阵是幂等的}

\begin{itemize}

\item  {\color{red} 问题：考虑二元线性回归模型，即 $p=2$, 证明 
\begin{eqnarray}
H^{\,2} = H, \,\, \text{tr}(H) = 3.
\end{eqnarray}
}

\item 证明：直接计算。使用矩阵的迹的性质：$\text{tr}(AB)=\text{tr}(BA)$. 

\item 问题：矩阵 $H$ 的对角线元素为 $h_{ii}$. 用自变量数据 $\{x_{ij}\}$ 表示 $h_{11}$.
\begin{eqnarray*}
X=\begin{pmatrix} 1&x_{11}&x_{12} \\ 1&x_{21}&x_{22} \\ \vdots&\vdots&\vdots \\ 1&x_{n1}&x_{n2} \end{pmatrix}.
\end{eqnarray*}

\item 定义：设计矩阵 $X$ 的列向量张成的子空间称为矩阵 $X$ 的列空间，记为 $$\text{col}(X).$$

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.5. 残差向量}

\begin{itemize}
\item  {\color{red} 定义：称 $(e_1,e_2,\cdots,e_n)=e=y-\hat{y}$ 为回归残差向量。}

\item  {\color{red} 问题：计算残差向量 $e$ 的协方差矩阵 $\text{cov}(e,e)$.}

\item 解答：
\begin{eqnarray*}
\text{var}(e) = \text{cov}(e,e) 
  &=& \text{cov}((E_n-H)y,(E_n-H)y) \\
  &=& (E_n-H)\text{cov}(y,y)(E_n-H)^{\, t} \\
  &=& (E_n-H) \sigma^2E_n(E_n-H)\\
  &=& \sigma^2(E_n-H).
\end{eqnarray*}

\item 结论：残差之间是不独立的，特别地，有
\begin{eqnarray*}
\text{var}(e_i,e_j) &=& -h_{ij}\sigma^2, \,\, i\neq j, \\
\text{var}(e_i,e_i) &=& (1-h_{ii})\sigma^2. \\
\end{eqnarray*}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.6. 残差向量与自变量向量相互垂直}

\begin{itemize}
\item  {\color{red} 问题：考虑二元线性回归模型。画图说明自变量向量，因变量向量，残差向量，并解释残差满足关系式
$$\sum\limits_{i=1}^n e_i=0, \sum\limits_{i=1}^n e_ix_{i1}=0, \sum\limits_{i=1}^n e_ix_{i2}=0.$$
}

%\item 解答：

\begin{center}
\includegraphics[height=0.45\textheight,width=0.8\textwidth]{regression-projection-b.jpeg}
\end{center}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.7. 残差向量与自变量向量相互垂直的证明}

\begin{itemize}
\item  {\color{red} 问题：证明自变量向量与残差向量垂直。}

\item 解答：这是最小二乘法的自然结果。
\begin{eqnarray*}
\left\{\begin{array}{rcl}
\frac{\partial Q}{\partial \beta_0} &=& -2 \sum\limits_{i=1}^n (y_i - \beta_0 - \beta_1x_{i1} - \beta_2x_{i2})(-2) =0, \\
\frac{\partial Q}{\partial \beta_1} &=&  \sum\limits_{i=1}^n (y_i - \beta_0 - \beta_1x_{i1} - \beta_2x_{i2})(-2x_{i1}) =0, \\
\frac{\partial Q}{\partial \beta_2} &=& \sum\limits_{i=1}^n (y_i - \beta_0 - \beta_1x_{i1} - \beta_2x_{i2})(-2x_{i2}) =0.
\end{array}\right.
\end{eqnarray*}
最小二乘法得到的 $\hat{\beta}_0, \hat{\beta}_1,\hat{\beta}_2$ 是上述三个方程的解。 而残差的定义就是
$$ e_i := y_i - \hat{\beta}_0 - \hat{\beta}_1x_{i1} - \hat{\beta}_2x_{i2}. $$

\item 注释：上一页的图中，没有画出全1向量，即 $(1,1,\cdots,1)$. 

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.8. 误差项方差的估计}

\begin{itemize}
\item  {\color{red} 问题：考虑二元线性回归模型。由基本假设，误差项 $\varepsilon_1,\cdots,\varepsilon_n$ 是等方差的。
设 $\text{var}(\varepsilon_i)=\sigma^2$.证明下式是 $\sigma^2$ 的无偏估计，
$$\hat{\sigma}^2 = \frac{1}{n-3}\sum\limits_{i=1}^n e_i^2.$$
}

\item 证明：由三个等式可知，
\begin{eqnarray*}
\text{var}(e_i) &=& \text{E}(e_i^2) - \text{E}(e_i)^2, \\
\text{var}(e) &=& \sigma^2(E_n-H), \\
\text{tr}(H) &=& 3.
\end{eqnarray*}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.9. 例子3-1的变量及其含义}

\begin{itemize}
\item  {\color{red} 问题：要研究的因变量 $y$ 是城镇居民每人每年的消费支出。自变量有：
\begin{table}[ht]
\centering
\begin{tabular}{|c|c|}\hline
自变量 & 含义 \\ \hline
$x_1$ & 食品花费 \\ \hline
$x_2$ & 衣着花费 \\ \hline
$x_3$ & 居住花费 \\ \hline
$x_4$ & 医疗保健 \\ \hline
$x_5$ & 文教娱乐 \\ \hline
$x_6$ & 地区的职工平均工资 \\ \hline
$x_7$ & 地区的人均GDP \\ \hline
$x_8$ & 地区的消费价格指数 \\ \hline
$x_9$ & 地区的失业率 \\ \hline
\end{tabular}
\end{table}
}

%\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.2.10. 例子3-1的回归模型的程序计算}

\begin{itemize}
\item  {\color{red} 问题：用R软件对例子3-1进行线性回归分析。}

\item 解答：
{\color{blue}
\begin{verbatim}
mydata <- read.table('ex3-1.csv',head=T)
lm01<-lm(y~x1+x2+x3+x4+x5+x6+x7+x8+x9,data=mydata)
summary(lm01)
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.11. 例子3-1的回归模型的结果解释}

\begin{itemize}
\item  {\color{red} 问题：分析得到的线性回归模型。}

\item 解答：考虑下述问题，
\begin{itemize}
\item 哪些变量对居民的消费性支出有显著影响？
\item 与经济理论相验证。
\end{itemize}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.2.12. 计算题}

\begin{itemize}

\item  {\color{red} 问题：考虑二元线性回归模型。设计矩阵为
\begin{eqnarray*}
X=\begin{pmatrix} 1&x_{11}&x_{12} \\ 1&x_{21}&x_{22} \\ \vdots&\vdots&\vdots \\ 1&x_{n1}&x_{n2} \end{pmatrix}.
\end{eqnarray*}
矩阵 $H=X(X^{\,t}X)^{-1}X^{\,t}$ 的对角线元素为 $h_{ii}$. 用自变量数据 $\{x_{ij}\}$ 表示 $h_{ii}$.
}

\item 解答：

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.3.1. 最小二乘估计量}

\begin{itemize}
\item  {\color{red} 问题：证明最小二乘估计 $\hat\beta$ 是因变量向量 $y$ 的一个线性函数。}

\item 记号：下述矩阵分别是设计矩阵，因变量数据，参数，和误差项。
{\small
\begin{eqnarray*}
X=\begin{pmatrix} 1&x_{11}&\cdots &x_{1p} \\ 1&x_{21}&\cdots &x_{2p} \\ \vdots&\vdots& &\vdots \\ 1&x_{n1}&\cdots &x_{np} \end{pmatrix},
\hspace{0.1cm}
y=\begin{pmatrix} y_1 \\ y_2 \\ \vdots \\ y_n \end{pmatrix},
\hspace{0.1cm}
\beta=\begin{pmatrix} \beta_0 \\ \beta_1  \\ \cdots  \\ \beta_p \end{pmatrix},
\hspace{0.1cm}
\hat{\beta}=\begin{pmatrix} \hat{\beta}_0 \\ \hat{\beta}_1 \\ \cdots  \\ \hat{\beta}_p \end{pmatrix},
\hspace{0.1cm}
\varepsilon=\begin{pmatrix} \varepsilon_1 \\  \varepsilon_1 \\ \vdots \\  \varepsilon_n \end{pmatrix}.
\end{eqnarray*}
}

\item 解答：最小二乘法的目标函数 $Q = \varepsilon^t\cdot \varepsilon = (y-X\beta)^t(y-X\beta)$. 
对 $\beta$ 求导，得到 $\frac{\partial Q}{\partial \beta} = -2X^t(y-X\beta)$. 令其为零，可得 
$\hat{\beta}=(X^tX)^{-1}X^ty$. 这是 $y$ 的一个线性函数。
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.3.2. 最小二乘估计量的均值与协方差}

\begin{itemize}
\item  {\color{red} 问题：}
\begin{enumerate}
\item  {\color{red} 证明 $\hat\beta$ 是 $\beta$ 的无偏估计。}
\item  {\color{red} 证明最小二乘估计 $\hat\beta$ 的协方差矩阵等于 $\sigma^2 (X^tX)^{-1}$. }
\item  {\color{red} 写出 $p=1$ 时参数的最小二乘估计量，以及协方差矩阵。}
\end{enumerate}

\item 证明：
\begin{enumerate}
\item $\text{E}(\hat{\beta})=\text{E}((X^tX)^{-1}X^ty)$, 由 $y=X\beta+\varepsilon$ 以及 $\text{E}(\varepsilon)=0$ 得证。
\item $\text{cov}(\hat{\beta},\hat{\beta})=\text{cov}((X^tX)^{-1}X^ty,(X^tX)^{-1}X^ty)$, 由 $\text{cov}(y,y)=\sigma^2E_n$ 得证。
\item 首先计算 $X^tX$, 然后用上述公式即得。
\begin{eqnarray*}
X^tX=
\begin{pmatrix} 1&1&\cdots & 1 \\ x_{1} & x_{2} & \cdots & x_{n} \end{pmatrix}
\begin{pmatrix} 1&x_{1} \\ 1&x_{2} \\ \vdots & \vdots \\ 1&x_{n} \end{pmatrix}
=\begin{pmatrix} n& \sum x_i \\ \sum x_i & \sum x_i^2 \end{pmatrix}
\end{eqnarray*}

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.3.3. 最小二乘估计量的相关系数}

\begin{itemize}
\item  {\color{red} 问题：考虑一元线性回归模型 $y=\beta_0+\beta_1x_1+\varepsilon$，计算参数的最小二乘估计量之间的相关系数矩阵。}

\item 解答：随机变量 $X,Y$ 之间的相关系数定义为
\begin{eqnarray*}
R(X,Y) = \frac{\text{cov}(X,Y)}{\sqrt{\text{var}(X)}\sqrt{\text{var}(Y)}}. 
\end{eqnarray*}
因此 $R(\hat{\beta_0}, \hat{\beta_0})=1$, $R(\hat{\beta_1},\hat{\beta_1})=1$, 以及
\begin{eqnarray*}
R(\hat{\beta}_0,\hat{\beta}_1) 
= \frac{\text{cov}(\hat{\beta}_0,\hat{\beta}_1)} { \sqrt{\text{var}(\hat{\beta}_0)} \sqrt{\text{var}(\hat{\beta}_0)} }.
\end{eqnarray*}

\item 由 $\text{cov}(\hat{\beta},\hat{\beta})=\sigma^2 (X^tX)^{-1}$ 可知，参数估计的稳定性与设计矩阵 $X$ 有很大关系。


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.3.4. 高斯-马尔可夫定理}

\begin{itemize}
\item  {\color{red} 定理：设 $\text{E}(y)=X\beta$, 设 $\text{D}(y)=\sigma^2 E_n$, 这里 $I_n$ 是单位矩阵。
设 $c$ 是任意一个长度为 $p+1$ 的行向量，则 $c\hat{\beta}$ 是 $c\beta$ 的最小方差线性无偏估计，简称BLUE. }

\item 证明：

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.3.5. 参数估计量与残差的协方差}

\begin{itemize}
\item  {\color{red} 问题：证明参数的最小二乘估计与每个残差是不相关的，即有 $\text{cov}(\hat{\beta},e)=0$. }

\item 证明：

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.3.6. 参数估计量的分布}

\begin{itemize}
\item  {\color{red} 问题：假设误差项 $\varepsilon_i$ 服从正态分布 $N(0,\sigma^2)$. }
\begin{enumerate}
\item  {\color{red} 证明参数的最小二乘估计量也服从正态分布 $N(\beta,\sigma^2(X^tX)^{-1})$. }
\item  {\color{red} 记 $\text{SSE}$ 是残差平方和，证明 $\text{SSE}/\sigma^2$ 服从卡方分布 $\chi^2(n-p-1)$. }
\end{enumerate}

\item 证明：
\begin{enumerate}
\item 
\begin{enumerate}
\item 首先 $\hat{\beta}=(X^tX)^{-1}X^ty$ 是 $y=X\beta+\varepsilon$ 的一个线性函数。因此如果 $\varepsilon\sim N(0,\sigma^2E_n)$, 则 $\hat{\beta}$ 也是一个正态分布的随机向量。
\item 分别计算 $\hat{\beta}$ 的均值和方差，立刻得证。
\end{enumerate}

\item 
\begin{enumerate}
\item $\text{SSE} = e^t\cdot e = (y-X\hat{\beta})^t(y-X\hat{\beta})$. 这是 $(x_i,y_i)$ 的一个表达式。
\item 
\end{enumerate}
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.3.7. 例子3-1的参数估计量之间的相关系数}

\begin{itemize}
\item  {\color{red} 问题：计算例子3-1的参数的协方差矩阵和相关系数矩阵。}

\item 解答：数据文件在资料里下载。
{\footnotesize\color{blue}
\begin{verbatim}
> mydata<-read.table('ex3-1b.txt',head=T,sep='\t')
> lm3.1 <- lm(Y~x1+x2+x3+x4+x5+x6+x7+x8+x9, data=mydata)
> summary(lm3.1)
> sigma2<-sum(lm3.1$residuals^2)/(31-9-1)
> X1<-as.matrix(mydata[,1:9])  #取出数据框的前9列，并存为矩阵类型
> X<-cbind(1,X1)  #矩阵左边加一列1
> XX<-t(X)%*%X  #按照矩阵乘法，而不是分量分别相乘
> covbeta<-sigma2*solve(XX)  #计算参数向量的协方差矩阵
> r<-matrix(nrow=10,ncol=10)  #准备存放相关系数矩阵
> for (i in 1:10)
+ for (j in 1:10)
+ r[i,j]<-covbeta[i,j]/sqrt(covbeta[i,i]*covbeta[j,j])
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.3.8. 思考题}

\begin{itemize}
\item  {\color{red} 问题：证明 $\text{SSE}/\sigma^2$ 服从卡方分布 $\chi^2(n-p-1)$. }

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.4.1. 回归方程的显著性检验}

\begin{itemize}
\item  {\color{red} 问题：考虑二元线性回归模型 $y=\beta_0+\beta_1x_1+\beta_2x_2+\varepsilon$. 进行假设检验
$$H_0: \beta_0=\beta_1=\beta_2=0, \text{ vs } H_1: \exists i, \beta_i\neq 0.$$ }

\item 解答：设显著性水平为 $\alpha$, 构造统计量 $$F=\frac{\text{SSR}/2}{\text{SSE}/(n-3)}.$$
\begin{enumerate}
\item 当零假设 $H_0$ 成立时，这个统计量服从分布 $F\sim F(2,n-3)$.  
\item 当统计值 $ f > F_\alpha(2,n-3)$ 时，拒绝零假设，认为模型显著。
\item 或者计算 $p$ 值，即 $p=\mathbb{P}(F>f)$. 当 $p<\alpha$ 时，拒绝 $H_0$, 认为模型显著。
\end{enumerate}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.4.2. 方差分析表}

\begin{itemize}
\item  {\color{red} 问题：什么是方差分析表？}

\item 解答：方差分析是指对因变量的离差平方和 $SST=SSR+SSE$ 进行个要素的分析。
{\small
\begin{table}[ht!]
\centering
\caption{方差分析表}\vspace{0.0cm}
\begin{tabular}{|c|c|c|c|c|c|}\hline
方差来源 & 自由度 & 平方和 & 均方 & F值 & p值 \\ \hline 
回归 & $p$ & $\text{SSR}$ & $\text{SSR}/p$ & $f=\frac{\text{SSR}/p}{\text{SSE}/(n-p-1)}$ & $\mathbb{P}(F>f)$ \\ \hline 
残差 & $n-p-1$ & $\text{SSE}$ & $\text{SSE}/(n-p-1)$ & &  \\ \hline 
总和 & $n-1$ & $\text{SST}$ & & &  \\ \hline 
\end{tabular}
\end{table}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.4.3. 例子3-1的方差分析(先引入的自变量较重要)}

\begin{itemize}
\item  {\color{red} 问题：对例子3-1的回归模型进行方差分析。}

%\item 解答：
{\tiny\color{blue}
\begin{verbatim}
> anova(lm3.1)
Analysis of Variance Table
Response: Y
          Df    Sum Sq   Mean Sq   F value    Pr(>F)    
x1         1 334652105 334652105 1384.4193 < 2.2e-16 ***
x2         1  40791159  40791159  168.7486 1.664e-11 ***
x3         1   9961772   9961772   41.2108 2.310e-06 ***
x4         1   4189106   4189106   17.3299 0.0004401 ***
x5         1  15019486  15019486   62.1340 1.043e-07 ***
x6         1     21791     21791    0.0901 0.7669437    
x7         1   1015073   1015073    4.1992 0.0531413 .  
x8         1    298174    298174    1.2335 0.2792803    
x9         1     43918     43918    0.1817 0.6742665    
Residuals 21   5076276    241727                        
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.4.4. 例子3-1的方差分析(自变量地位平等)}

\begin{itemize}
\item  {\color{red} 问题：使用 Type II 和 Type III 平方和进行方差分析。} 

\item 解答：载入 \,{\color{blue}\verb+car+} 程序包，使用 \,{\color{blue}\verb+Anova()+} 函数。
{\color{blue}
\begin{verbatim}
> install.packages('car')
> library(car)
> Anova(lm3.1, type='II')
\end{verbatim}
}

\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.4.5. 自变量的显著性检验}

\begin{itemize}
\item  {\color{red} 问题：在多元线性回归模型中，}
\begin{enumerate}
\item  {\color{red} 为什么需要对每个变量进行显著性检验？}
\item  {\color{red} 构造检验统计量的出发点是什么？}
\end{enumerate}

\item 解答：
\begin{enumerate}
\item 有时回归方程时显著的，但是变量不一定是显著的。有时候就需要剔除一些次要的变量。这就需要进行变量的显著性检验。
\item %检验统计量的构造依据是，
在误差的正态假设下，参数的最小二乘估计量服从正态分布
$$\hat{\beta} \sim N(\beta,\sigma^2(X^tX)^{-1}). $$
记矩阵 $(X^tX)^{-1}$ 的对角线元素为 $c_{00},c_{11},\cdots,c_{pp}$. 则 $$\hat{\beta}_i \sim N(\beta_i,\sigma^2c_{ii}).$$
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.4.6. 二元线性回归模型的变量的显著性检验}

\begin{itemize}
\item  {\color{red} 问题：考虑二元线性回归模型 $y=\beta_0+\beta_1x_1+\beta_2x_2+\varepsilon$. 进行假设检验
$$H_0: \beta_1=0, \text{ vs. } H_1: \beta_1\neq 0.$$ } 

\vspace{-0.6cm}

\item 解答：设显著性水平为 $\alpha$. 构造下述统计量，其中 $\hat{\sigma}^2 = \frac{1}{n-3}\sum\limits_{i=1}^n e_i^2$, 
 $$T_1=\frac{\hat{\beta}_1}{\sqrt{c_{11}}\hat{\sigma}}.$$
\begin{enumerate}
\item 当零假设为真时，这个统计量服从 $T_1\sim t(n-3)$. 
\item 计算统计值 $t_1$, 当 $|t_1|\ge t_{\alpha/2}(n-3)$ 时，拒绝零假设。
\item 或者计算 $p$ 值，$p=\mathbb{P}(T_1\ge t_1)$, 当 $p>\alpha$ 时，拒绝零假设。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.4.7. 例子3-1的变量的显著性检验}

\begin{itemize}
\item   {\color{red} 问题：对例子3-1的数据，对每个变量进行显著性检验。并剔除不显著的变量。}

\item 解答：可以查回归模型结果，也可以按公式一步步计算。
{\color{blue}
\begin{verbatim}
> summary(lm3.1)
> lm3.1.125 <- lm(Y~x1+x2+x5,data=mydata)
> summary(lm3.1.125)
> lm3.1.1235 <- lm(Y~x1+x2+x3+x5,data=mydata)
> summary(lm3.1.1235)
\end{verbatim}
}

\item 注意：剔除一些变量，会使得其余变量的显著性发生变化。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.4.8. 偏回归平方和的概念}

\begin{itemize}
\item  {\color{red} 问题：以二元线性回归模型为例，解释什么是偏回归平方和。}

\item 解答：设样本数据为 $(x_{i1},x_{i2},y_i),1\le i\le n$.
\begin{enumerate}
\item 考虑二元线性回归模型 $y=\beta_0+\beta_1x_1+\beta_2x_2+\varepsilon$, 
记残差平方和为 $\text{SSE}$, 回归平方和为 $\text{SSR}$;

\item 考虑一元线性回归模型 $y=\beta_0+\beta_2x_2+\varepsilon$, 
记残差平方和为 $\text{SSE}_{(1)}$, 回归平方和为 $\text{SSR}_{(1)}$;

\item 称 $\Delta \text{SSR}_{(1)}:= \text{SSR} - \text{SSR}_{(1)}$ 为{\color{red}变量 $x_1$ 的偏回归平方和}。

\item 偏 $F$ 统计量是指 $$F_1:=\frac{\Delta \text{SSR}_{(1)}}{\text{SSE}/(n-3)}. $$

\end{enumerate}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.4.9. 用偏回归平方和来检验变量的显著性}

\begin{itemize}
\item   {\color{red} 问题：记 $T_1=\hat{\beta}_1/(\hat{\sigma}\sqrt{c_{11}})$ 是检验参数 $\beta_1$ 的显著性的统计量。
证明 $$F_1 = T_1^{\,\,2}.$$ 
}

\item 证明：

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.4.10. 参数的置信区间}

\begin{itemize}
\item  {\color{red} 问题：考虑二元线性回归模型 $y=\beta_0+\beta_1x_1+\beta_2x_2+\varepsilon$.
设误差项服从正态分布。求参数 $\beta_1$ 的置信区间。}

\item 解答：
\begin{enumerate}
\item 根据 $\hat{\beta}_1 \sim N(\beta_1,\sigma^2c_{11})$, 可得
$T_1 := \frac{\hat{\beta}_1-\beta_1}{\hat{\sigma}\sqrt{c_{11}}} \sim t(n-3). $
\item 给定置信水平 $\alpha$, 写出概率公式
$\mathbb{P}(-t_{\alpha/2} <T_1< t_{\alpha/2}) = 1-\alpha$. \\
这里 $t_{\alpha/2}$ 和 $-t_{\alpha/2}$ 是上下分位数。
\item 从上式括号中的两个不等式，解出参数 $\beta_1$ 的置信区间：
$$\hat{\beta}_1 - t_{\alpha/2}\sigma\sqrt{c_{11}} < \beta < \hat{\beta}_1 + t_{\alpha/2}\sigma\sqrt{c_{11}}. $$
\end{enumerate}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.4.11. 概率密度函数与分位数（图像）}

\begin{center}
\includegraphics[height=0.7\textheight,width=0.9\textwidth]{beta-conf-int.png}
\end{center}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.4.12. 概率密度函数与分位数（代码）} 

\begin{itemize}
\item  {\color{red} 问题：画出 $t(n-p-1)$ 分布的概率密度函数的图像。}

\item 解答：设 $n=31,p=9,\alpha=0.05$. 
{\footnotesize\color{blue}
\begin{verbatim}
> n<-31; p<-9; x<-seq(-4,4,0.1)
> y<-dt(x,df=n-p-1)
> plot(x,y,type='l')
> abline(h=0)
> ta<-qt(0.025,df=n-p-1)
> x1<-x[x<=ta]; y1<-y[x<=ta]
> polygon(c(x1,tail(x1,1),head(x1,1)),c(y1,0,0),col='skyblue')
> tb<-qt(0.975,df=n-p-1)
> x2<-x[x>=tb]; > y2<-y[x>=tb]
> polygon(c(x2,tail(x2,1),head(x2,1)),c(y2,0,0),col='skyblue')
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.4.13. 复相关系数}

\begin{itemize}
\item 定义：考虑二元线性回归模型 $y=\beta_0+\beta_1x_1+\beta_2x_2+\varepsilon$, 设残差平方和为 $\text{SSE}$, 回归平方和为 $\text{SSR}$, 总离差平方和为 $\text{SST}$. 称 $R=\sqrt{R^{\,2}}=\frac{\text{SSR}}{\text{SST}}$ 为因变量 $y$ 关于自变量 $x_1,x_2$ 的{\color{red}样本复相关系数}。称 $R^{\,2}$ 为{\color{red}样本决定系数}。

\item  {\color{red} 问题：样本复相关系数 $R$ 衡量了什么？}

\item 解答：$R$ 衡量了自变量向量 $(x_1,x_2)$ 与因变量 $y$ 的线性关系。

\item  {\color{red} 问题：写出调整的 $R^{\,2}$ 的定义。 } %写出 $R^{\,2}_{a}$ 与 $R^{\,2}$ 的关系。


\item 解答：

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.4.14. 思考题} 

\begin{itemize}
\item  {\color{red} 问题：画出 $F(p-1,n-p-1)$ 分布的概率密度函数的图像。}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.5.1. 舍入误差}

\begin{itemize}
\item  {\color{red} 问题：什么是舍入误差？产生舍入误差的原因是什么？}

\item 解答：自变量的观测数值是近似的，在多元线性回归中，存在运算的四舍五入等操作，这产生了舍入误差。
原因：
\begin{itemize}
\item  不同的自变量有不同的单位，数据大小差别会很大。
\item  当设计矩阵 $X$ 的列向量之间近似线性相关，那么 $X^{\,t}X$ 的行列式的值会接近零，这样求逆阵 $(X^{\,t}X)^{-1}$ 就会产生较大误差。举例解释：
\begin{eqnarray}
X^{\,t}X = \begin{pmatrix} 1&1& \cdots &1 \\  x_1&x_2&\cdots& x_n  \end{pmatrix}
\begin{pmatrix} 1&x_1 \\ 1&x_2 \\ \vdots &\vdots \\  1&x_n   \end{pmatrix}.
\end{eqnarray}

\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.5.2. 数据的中心化}

\begin{itemize}

\item  {\color{red} 问题：多元线性回归模型的经验回归方程是什么？
什么是数据的中心化？中心化之后的经验回归方程是什么？}

\item 解答：多元线性回归模型的经验回归方程是
\begin{eqnarray}
\hat{y} = \hat{\beta}_0 + \hat{\beta}_1x_1+\hat{\beta}_2x_2+\cdots+\hat{\beta}_px_p.
\end{eqnarray}

将坐标原点移动到样本中心，就称为{\color{red}数据的中心化}。设样本数据为 
\begin{eqnarray}
(x_{i1},x_{i2},\cdots,x_{ip},y_i), 1\le i\le n.
\end{eqnarray} 
则样本中心为 $(\bar{x}_1,\bar{x}_2,\cdots,\bar{x}_p,\bar{y})$. 数据的中心化是指 
\begin{eqnarray}
x_{ij}^{\,'}=x_{ij}-\bar{x}_j,\,\, y_i^{\,'}=y_i-\bar{y},\,\,\, 1\le i\le n, 1\le j\le p.
\end{eqnarray}
数据中心化之后的经验回归方程是
\begin{eqnarray}
\hat{y}^{\,'} = \hat{\beta}_1x_{1}^{\,'}+\hat{\beta}_2x_{2}^{\,'} + \cdots + \hat{\beta}_p x_{p}^{\,'}.
\end{eqnarray}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.5.3. 数据的标准化}

\begin{itemize}
\item  {\color{red} 问题：什么是数据的标准化？为什么要对数据做标准化处理？写出数据标准化之后的样本回归方程。}

\item 解答：因为不同的自变量往往有不同的单位，数据的大小差异也很大，这就很难把它们放在同一个方程里。
设观测到的样本数据为 
\begin{eqnarray}
(x_{i1},x_{i2},\cdots,x_{ip},y_i), 1\le i\le n.
\end{eqnarray}
则这个样本数据{\color{red}标准化}是指
\begin{eqnarray}
x_{ij}^{\,*}=(x_{ij}-\bar{x}_j)/\hat{\sigma}_j,\,\, y_i^{\,*}=(y_i-\bar{y})/\hat{\sigma}_y,\,\,\, 1\le i\le n, 1\le j\le p.
\end{eqnarray}
其中 $\hat{\sigma}_j$ 是自变量 $x_j$ 的数据的样本标准差，$\hat{\sigma}_y$ 是因变量 $y$ 的数据的样本标准差。
数据标准化之后的样本回归方程是
\begin{eqnarray}
\hat{y}^{\,*} = \hat{\beta}_1^*x_1^{\,*}+\hat{\beta}_2^*x_2^{\,*}+\cdots+\hat{\beta}_p^*x_p^{\,*}.
\end{eqnarray}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.5.4. 数据标准化对系数的影响}

\begin{itemize}
\item  {\color{red} 问题：数据标准化的样本回归方程的系数 $\hat{\beta}_j^*$ 和原始数据的样本回归方程的系数 $\hat{\beta}_j$ 的关系是什么？}

\item 解答：由数据的标准化的计算公式，可知 
\begin{eqnarray}
\hat{\beta}_j^{\,*} = \frac{\hat{\sigma}_j}{\hat{\sigma_y}}\hat{\beta}_j. 
\end{eqnarray}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.5.5. 将数据标准化的好处}

\begin{itemize}
\item  {\color{red} 例子：将数据标准化有哪些好处？}

\item 解答：
\begin{itemize}
\item  容易比较自变量对因变量的影响的相对重要性。
\item  容易计算样本相关矩阵。
\item  模型为过原点的回归，减少一个参数。
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.6.1. 简单相关系数的定义}

\begin{itemize}
\item   {\color{red} 问题：什么是下述两组数据的简单相关系数？}
\begin{eqnarray}
x &=& (x_1,x_2,\cdots,x_n),\\
y &=& (y_1,y_2,\cdots,y_n).
\end{eqnarray}

\item 解答：简单相关系数也称Pearson相关系数，是指
\begin{eqnarray}
r_{xy}=\frac{L_{xy}}{\sqrt{L_{xx}L_{yy}}}
=\frac{ \sum\limits_{i=1}^{n} (x_i-\bar{x})(y_i-\bar{y})} {\sqrt{ \sum\limits_{i=1}^{n} (x_i-\bar{x})^2 \sum\limits_{i=1}^{n} (y_i-\bar{y})^2}}.
\end{eqnarray}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.6.2. 样本相关矩阵的定义}

\begin{itemize}
\item  {\color{red} 问题：什么是样本相关矩阵，什么是增广的样本相关矩阵？}

\item 解答：设样本数据为 $(x_{i1},x_{i2},\cdots,x_{ip},y_i), 1\le i\le n$.
则自变量的样本相关矩阵和增广的样本相关矩阵分别为
\begin{eqnarray}
r=\begin{pmatrix} 
1      & r_{12} &\cdots & r_{1p} \\ 
r_{21} & 1      &\cdots & r_{2p} \\ 
\vdots & \vdots &\vdots & \vdots \\ 
r_{p1} & r_{p2} &\cdots & 1 
\end{pmatrix}, \hspace{0.3cm}
r=\begin{pmatrix} 
1      & r_{y1} &r_{y2} &\cdots & r_{yp} \\ 
r_{1y} & 1 &  r_{12}&\cdots & r_{1p} \\ 
r_{2y} & r_{21} &  1 &\cdots & r_{2p} \\ 
\vdots & \vdots &\vdots & \vdots \\ 
r_{py} & r_{p1} & r_{p2} &\cdots & 1 
\end{pmatrix}.
\end{eqnarray}

\item  注解：增广的样本相关矩阵就是把因变量 $y$ 也放在内的样本相关矩阵，这是一个 $p+1$ 阶的矩阵。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.6.3. 计算样本相关矩阵的例子} 

\begin{itemize}
\item  {\color{red} 问题：计算例子3-1中的增广的样本相关矩阵。}

\item 解答：

{\color{blue}
\begin{verbatim}
> mydata<-read.table('data-example-3-1.txt',head=T,sep='')
> cor(mydata)
> options(digits=3)
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.6.4. 偏决定系数的定义}

\begin{itemize}
\item  {\color{red} 问题：以二元线性回归模型为例，解释什么是偏决定系数。}

\item 解答：
\begin{itemize}
\item  记模型 $y=\beta_0+\beta_2x_2+\varepsilon$ 的残差平方和为 $\text{SSE}(x_2)$;
\item  记模型 $y=\beta_0+\beta_1x_1+\beta_2x_2+\varepsilon$ 的残差平方和为 $\text{SSE}(x_1,x_2)$. 
\item  则残差平方和的相对减少量 
\begin{eqnarray}
r_{y1;2}^{\,2} = \frac{\text{SSE}(x_2) - \text{SSE}(x_1,x_2)}{\text{SSE}(x_2}
\end{eqnarray}
称为{\color{red}在模型中已有变量 $x_2$ 时，变量 $y$ 与变量 $x_1$ 的偏决定系数}。
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.6.5. 偏相关系数的定义}

\begin{itemize}
\item  {\color{red} 问题：以二元线性回归模型为例，解释什么是偏相关系数。}

\item 解答：
\begin{itemize}
\item  考虑二元线性回归模型 $y=\beta_0+\beta_1x_1+\beta_2x_2+\varepsilon$.
\item  记 $\text{sgn}(\beta_1)=\pm 1$ 是参数 $\beta_1$ 的正负号。
\item  在模型中已有 $x_2$ 时，$y$ 与 $x_1$ 的{\color{red}偏相关系数}是指
\begin{eqnarray}
\text{sgn}(\beta_1)\cdot\sqrt{r_{y1;2}^{\,2}}.
\end{eqnarray}
\item  偏相关系数是偏决定系数的平方根，符号由回归系数的符号确定。
\end{itemize}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.6.6. 例子3-2. 经济发展与招商投资}

\begin{itemize}
\item  {\color{red} 问题：北京市各经济开发区的经济发展与招商投资的关系。}
\begin{table}[ht]
\centering
\begin{tabular}{|c|c|}\hline
变量 & 含义 \\ \hline
$y$ & 开发区的销售收入 \\ \hline
$x_1$ & 开发区的累计招商数目 \\ \hline
$x_2$ & 招商企业的注册资本 \\ \hline
\end{tabular}
\end{table}
\begin{enumerate}
\item 计算线性回归模型。检验参数和模型的显著性。
\item 计算残差平方和 $\text{SSE}(x_1)$, $\text{SSE}(x_2)$ 和 $\text{SSE}(x_1,x_2)$.
\item 计算偏决定系数 $r_{y1;2}^2$ 和偏相关系数 $r_{y1;2}$. 
\end{enumerate}

\item 数据：资料 - 数据文件 - data-example-3-2.txt.

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{3.6.7. 例子3-2. 经济发展与招商投资（结论）}

\begin{enumerate}
\item 经验回归模型：$\hat{y} = -327.039 + 2.036 x_1 + 0.468 x_2$.
\item 检验参数和模型都是显著的。
\item 残差平方和 
\begin{eqnarray}
\text{SSE}(x_1) &=& 5983426, \\
\text{SSE}(x_2)&=& 7607485, \\
\text{SSE}(x_1,x_2)&=& 2716078.
\end{eqnarray}

\item 偏决定系数和偏相关系数分别为
\begin{eqnarray}
r_{y1;2}^2 &=& \frac{\text{SSE}(x_2) - \text{SSE}(x_1,x_2)}{\text{SSE}(x_2)} = \frac{7607485 - 2716078}{7607485}=0.643, \\
r_{y1;2} &=& \sqrt{r_{y1;2}^2} = \sqrt{0.643} =0.802. 
\end{eqnarray}

\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.6.8. 例子3-2. 经济发展与招商投资（程序）} 

\begin{itemize}
\item 程序：
{\color{blue}
\begin{verbatim}
> mydata<-read.table('data-example-3-2.txt',head=T,sep='\t')
> r<-cor(mydata)
> lm01<-lm(y~x1+x2,data=mydata)
> summary(lm01)
> anova(lm(y~x1,data=mydata))
> anova(lm(y~x2,data=mydata))
> anova(lm01)
\end{verbatim}
}

\item  注解：要从方差分析表里找出各个残差平方和。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{3.6.9. 偏相关系数和简单相关系数的关系} 

\begin{itemize}
\item  {\color{red} 定理：偏相关系数 $r_{12;3}$ 和简单相关系数 $r_{12},r_{13},r_{23}$ 之间的一个关系式
\begin{eqnarray}
r_{12;3} = \frac{r_{12}-r_{13}r_{23}}{\sqrt{(1-r_{13}^2)(1-r_{23}^2)}}.
\end{eqnarray}
}

\item  {\color{red} 问题：验证在例子3-2中，成立
\begin{eqnarray}
r_{y1;2} = \frac{r_{y1}-r_{y2}r_{12}}{\sqrt{(1-r_{y2}^2)(1-r_{12}^2)}}.
\end{eqnarray}
}

\vspace{-1cm}

%\item 解答：
{\color{blue}
\begin{verbatim}
> ry1<-r[3,1] 
> ry2<-r[3,2]
> r12<-r[1,2]
> (ry1-ry2*r12)/sqrt((1-ry2^2)*(1-r12^2))
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}



